{
 "cells": [
  {
   "cell_type": "code",
   "id": "7cb23ea4-fca4-445f-92a7-5888f1bbe6a1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:41:06.537171Z",
     "iopub.status.busy": "2024-11-17T12:41:06.536920Z",
     "iopub.status.idle": "2024-11-17T12:41:06.546166Z",
     "shell.execute_reply": "2024-11-17T12:41:06.545420Z",
     "shell.execute_reply.started": "2024-11-17T12:41:06.537150Z"
    }
   },
   "source": [
    "%env LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1\n",
    "%env LLM_API_KEY=替换为自己的Qwen API Key，如果不需要评估，则不需要"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0dc2dd8a-ca1f-44b1-95cb-68d21a407fe0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:41:06.547817Z",
     "iopub.status.busy": "2024-11-17T12:41:06.547304Z",
     "iopub.status.idle": "2024-11-17T12:41:07.069894Z",
     "shell.execute_reply": "2024-11-17T12:41:07.069393Z",
     "shell.execute_reply.started": "2024-11-17T12:41:06.547792Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "from openai import OpenAI"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "318a43db-8b14-455c-b668-3fe1c446214c",
   "metadata": {},
   "source": [
    "# 加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "81a68698-5063-4302-9524-0c30759813e0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:41:07.070476Z",
     "iopub.status.busy": "2024-11-17T12:41:07.070293Z",
     "iopub.status.idle": "2024-11-17T12:41:07.072889Z",
     "shell.execute_reply": "2024-11-17T12:41:07.072581Z",
     "shell.execute_reply.started": "2024-11-17T12:41:07.070462Z"
    }
   },
   "outputs": [],
   "source": [
    "expr_version = 'product_v03_flowise_rrf'\n",
    "\n",
    "preprocess_output_dir = os.path.join(os.path.pardir, 'outputs', 'v1_20240713')\n",
    "expr_dir = os.path.join(os.path.pardir, 'experiments', expr_version)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c47b7af8-b390-420a-8a72-22e52c19861b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:41:07.073810Z",
     "iopub.status.busy": "2024-11-17T12:41:07.073683Z",
     "iopub.status.idle": "2024-11-17T12:41:07.192811Z",
     "shell.execute_reply": "2024-11-17T12:41:07.192345Z",
     "shell.execute_reply.started": "2024-11-17T12:41:07.073798Z"
    }
   },
   "outputs": [],
   "source": [
    "qa_df = pd.read_excel(os.path.join(preprocess_output_dir, 'question_answer.xlsx'))\n",
    "prediction_df = qa_df[qa_df['dataset'] == 'test'][['uuid', 'question', 'qa_type', 'answer']].rename(columns={'answer': 'ref_answer'})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eaf2c9aa-a597-47eb-9f6d-6e37ae48d55d",
   "metadata": {},
   "source": [
    "# 调用Flowise API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "9255751c-9441-497f-b502-27fd362b6471",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:41:07.193418Z",
     "iopub.status.busy": "2024-11-17T12:41:07.193249Z",
     "iopub.status.idle": "2024-11-17T12:41:07.262748Z",
     "shell.execute_reply": "2024-11-17T12:41:07.262289Z",
     "shell.execute_reply.started": "2024-11-17T12:41:07.193405Z"
    }
   },
   "outputs": [],
   "source": [
    "import requests\n",
    "\n",
    "API_URL = \"http://192.168.31.92:3000/api/v1/prediction/f2740f69-4244-4438-ad68-752811a3fc7e\"\n",
    "\n",
    "def rag(question):\n",
    "    payload = {\n",
    "        \"question\": question\n",
    "    }\n",
    "    response = requests.post(API_URL, json=payload)\n",
    "    return response.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "810bb385-442f-4c0f-a905-5c693e47709c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:41:07.263472Z",
     "iopub.status.busy": "2024-11-17T12:41:07.263339Z",
     "iopub.status.idle": "2024-11-17T12:41:16.048151Z",
     "shell.execute_reply": "2024-11-17T12:41:16.047678Z",
     "shell.execute_reply.started": "2024-11-17T12:41:07.263460Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'text': '报告日期：2023年12月12日',\n",
       " 'question': '这份报告的发布时间是什么时候',\n",
       " 'chatId': 'ed6b5627-3556-4522-bd75-c4c3607f40bf',\n",
       " 'chatMessageId': 'c11a3d85-06a6-4ee6-a070-4a639914b74a',\n",
       " 'isStreamValid': False,\n",
       " 'sessionId': 'ed6b5627-3556-4522-bd75-c4c3607f40bf'}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rag('这份报告的发布时间是什么时候')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6c18999-b35d-4b71-93df-5bd71ba6d82a",
   "metadata": {},
   "source": [
    "# 批量预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e133fcfc-d0d1-4ffb-8818-436a5555ea51",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:41:16.048717Z",
     "iopub.status.busy": "2024-11-17T12:41:16.048591Z",
     "iopub.status.idle": "2024-11-17T12:54:32.175491Z",
     "shell.execute_reply": "2024-11-17T12:54:32.174997Z",
     "shell.execute_reply.started": "2024-11-17T12:41:16.048705Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "279e2fd8ca584bc1afdf37b26ef10475",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from tqdm.auto import tqdm\n",
    "\n",
    "answer_dict = {}\n",
    "for idx, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "    uuid = row['uuid']\n",
    "    question = row['question']\n",
    "    raw_resp = rag(question)\n",
    "\n",
    "    answer_dict[question] = {\n",
    "        'uuid': uuid,\n",
    "        'ref_answer': row['ref_answer'],\n",
    "        'gen_answer': raw_resp['text']\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7416c321-1b70-47ee-9968-d1fac2af53c4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:54:32.176230Z",
     "iopub.status.busy": "2024-11-17T12:54:32.176068Z",
     "iopub.status.idle": "2024-11-17T12:54:32.179437Z",
     "shell.execute_reply": "2024-11-17T12:54:32.179101Z",
     "shell.execute_reply.started": "2024-11-17T12:54:32.176217Z"
    }
   },
   "outputs": [],
   "source": [
    "prediction_df.loc[:, 'gen_answer'] = prediction_df['question'].apply(lambda q: answer_dict[q]['gen_answer'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9e543968-c408-4604-9cbc-f80c17994ac4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:54:32.180094Z",
     "iopub.status.busy": "2024-11-17T12:54:32.179911Z",
     "iopub.status.idle": "2024-11-17T12:54:32.188830Z",
     "shell.execute_reply": "2024-11-17T12:54:32.188495Z",
     "shell.execute_reply.started": "2024-11-17T12:54:32.180082Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uuid</th>\n",
       "      <th>question</th>\n",
       "      <th>qa_type</th>\n",
       "      <th>ref_answer</th>\n",
       "      <th>gen_answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>385</th>\n",
       "      <td>ddfb44a5-ffa5-4973-a79b-662a0bfeb491</td>\n",
       "      <td>浮动利率房贷在新增房贷中的占比有何变化，为什么会有这种变化？</td>\n",
       "      <td>large_context</td>\n",
       "      <td>浮动利率房贷在新增房贷中的占比上升，这是因为它们通常比固定利率低100个基点以上，这能帮助债...</td>\n",
       "      <td>浮动利率房贷在新增房贷中的占比上升。因为30年房贷利率预计将在2025年一季度下降到6.4%...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>320</th>\n",
       "      <td>ecc12483-d88f-41af-93a6-16c48d4ef147</td>\n",
       "      <td>2023年二季度，银行业的商业房地产贷款拖欠率是多少？</td>\n",
       "      <td>detailed</td>\n",
       "      <td>0.8%</td>\n",
       "      <td>2023年二季度，银行业的商业房地产贷款拖欠率为0.8%。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>774ab320-aba8-45a7-a5f4-791782b46d08</td>\n",
       "      <td>哪些因素影响了欧元区和英国的经济增长？</td>\n",
       "      <td>detailed</td>\n",
       "      <td>就业市场韧性消退、内外部需求回落、通胀和利率水平高企、地缘冲突余波未散</td>\n",
       "      <td>欧洲央行收紧货币政策、通胀高企抑制消费能力、疫情放开的促进效应逐步消减以及全球经济放缓导致出...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174</th>\n",
       "      <td>05696bdf-74d8-4788-8e18-86439fa0bf0c</td>\n",
       "      <td>英格兰银行在6月22日的利率变动了多少基点?</td>\n",
       "      <td>detailed</td>\n",
       "      <td>50基点</td>\n",
       "      <td>英格兰银行在6月22日的利率变动了50基点。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1b782370-c7f8-48a8-9647-591bb91f27f7</td>\n",
       "      <td>全球供应链压力指数降至有记录以来最低值的时间点是什么时候？</td>\n",
       "      <td>detailed</td>\n",
       "      <td>2023年10月底</td>\n",
       "      <td>全球供应链压力指数降至有记录以来最低值的时间点是截至2023年10月底。</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     uuid                        question  \\\n",
       "385  ddfb44a5-ffa5-4973-a79b-662a0bfeb491  浮动利率房贷在新增房贷中的占比有何变化，为什么会有这种变化？   \n",
       "320  ecc12483-d88f-41af-93a6-16c48d4ef147     2023年二季度，银行业的商业房地产贷款拖欠率是多少？   \n",
       "60   774ab320-aba8-45a7-a5f4-791782b46d08             哪些因素影响了欧元区和英国的经济增长？   \n",
       "174  05696bdf-74d8-4788-8e18-86439fa0bf0c          英格兰银行在6月22日的利率变动了多少基点?   \n",
       "24   1b782370-c7f8-48a8-9647-591bb91f27f7   全球供应链压力指数降至有记录以来最低值的时间点是什么时候？   \n",
       "\n",
       "           qa_type                                         ref_answer  \\\n",
       "385  large_context  浮动利率房贷在新增房贷中的占比上升，这是因为它们通常比固定利率低100个基点以上，这能帮助债...   \n",
       "320       detailed                                               0.8%   \n",
       "60        detailed                就业市场韧性消退、内外部需求回落、通胀和利率水平高企、地缘冲突余波未散   \n",
       "174       detailed                                               50基点   \n",
       "24        detailed                                          2023年10月底   \n",
       "\n",
       "                                            gen_answer  \n",
       "385  浮动利率房贷在新增房贷中的占比上升。因为30年房贷利率预计将在2025年一季度下降到6.4%...  \n",
       "320                      2023年二季度，银行业的商业房地产贷款拖欠率为0.8%。  \n",
       "60   欧洲央行收紧货币政策、通胀高企抑制消费能力、疫情放开的促进效应逐步消减以及全球经济放缓导致出...  \n",
       "174                             英格兰银行在6月22日的利率变动了50基点。  \n",
       "24                全球供应链压力指数降至有记录以来最低值的时间点是截至2023年10月底。  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prediction_df.sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "326687c0-0a32-4d51-a846-b164ed11c1c6",
   "metadata": {},
   "source": [
    "# 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "72104fef-3eb4-45cb-b449-c8dc71281016",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:54:32.190030Z",
     "iopub.status.busy": "2024-11-17T12:54:32.189896Z",
     "iopub.status.idle": "2024-11-17T12:54:32.591791Z",
     "shell.execute_reply": "2024-11-17T12:54:32.591385Z",
     "shell.execute_reply.started": "2024-11-17T12:54:32.190017Z"
    }
   },
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "judge_llm = ChatOpenAI(\n",
    "    api_key=os.environ['LLM_API_KEY'],\n",
    "    base_url=os.environ['LLM_BASE_URL'],\n",
    "    model_name='qwen2-72b-instruct',\n",
    "    temperature=0\n",
    ")\n",
    "\n",
    "import time\n",
    "\n",
    "def evaluate(prediction_df):\n",
    "    \"\"\"\n",
    "    对预测结果进行打分\n",
    "    :param prediction_df: 预测结果，需要包含问题，参考答案，生成的答案，列名分别为question, ref_answer, gen_answer\n",
    "    :return 打分模型原始返回结果\n",
    "    \"\"\"\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个经济学博士，现在我有一系列问题，有一个助手已经对这些问题进行了回答，你需要参照参考答案，评价这个助手的回答是否正确，仅回复“是”或“否”即可，不要带其他描述性内容或无关信息。\n",
    "问题：\n",
    "<question>\n",
    "{{question}}\n",
    "</question>\n",
    "\n",
    "参考答案：\n",
    "<ref_answer>\n",
    "{{ref_answer}}\n",
    "</ref_answer>\n",
    "\n",
    "助手回答：\n",
    "<gen_answer>\n",
    "{{gen_answer}}\n",
    "</gen_answer>\n",
    "请评价：\n",
    "    \"\"\"\n",
    "    results = []\n",
    "\n",
    "    for _, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        question = row['question']\n",
    "        ref_answer = row['ref_answer']\n",
    "        gen_answer = row['gen_answer']\n",
    "\n",
    "        prompt = prompt_tmpl.replace('{{question}}', question).replace('{{ref_answer}}', str(ref_answer)).replace('{{gen_answer}}', gen_answer).strip()\n",
    "        result = judge_llm.invoke(prompt).content\n",
    "        results.append(result)\n",
    "\n",
    "        time.sleep(1)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "6d684e34-8c42-455e-9d2d-fe05a044fbba",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:54:32.592403Z",
     "iopub.status.busy": "2024-11-17T12:54:32.592265Z",
     "iopub.status.idle": "2024-11-17T12:56:56.091016Z",
     "shell.execute_reply": "2024-11-17T12:56:56.090557Z",
     "shell.execute_reply.started": "2024-11-17T12:54:32.592391Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9106c111f9e84ce3b8e9ca16b7dd18ca",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "prediction_df['raw_score'] = evaluate(prediction_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "e4adb904-42e9-4735-b6a0-90c34207bbc8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:56:56.091994Z",
     "iopub.status.busy": "2024-11-17T12:56:56.091556Z",
     "iopub.status.idle": "2024-11-17T12:56:56.095041Z",
     "shell.execute_reply": "2024-11-17T12:56:56.094678Z",
     "shell.execute_reply.started": "2024-11-17T12:56:56.091977Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['是', '否'], dtype=object)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prediction_df['raw_score'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "ef330e4a-cd66-409d-9233-0b0aeb1a02b4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:56:56.095680Z",
     "iopub.status.busy": "2024-11-17T12:56:56.095535Z",
     "iopub.status.idle": "2024-11-17T12:56:56.102123Z",
     "shell.execute_reply": "2024-11-17T12:56:56.101666Z",
     "shell.execute_reply.started": "2024-11-17T12:56:56.095666Z"
    }
   },
   "outputs": [],
   "source": [
    "prediction_df['score'] = (prediction_df['raw_score'] == '是').astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3b1ea28f-bfac-41d0-a2bb-706bfb403b05",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-17T12:56:56.102715Z",
     "iopub.status.busy": "2024-11-17T12:56:56.102584Z",
     "iopub.status.idle": "2024-11-17T12:56:56.109565Z",
     "shell.execute_reply": "2024-11-17T12:56:56.107559Z",
     "shell.execute_reply.started": "2024-11-17T12:56:56.102702Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.78"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prediction_df['score'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1eb91d3-cc18-48a3-be05-3c5cbb6beffb",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
